library(tidyverse)
library(tidycensus)
library(sf)
library(gt)
library(gtExtras)
library(factoextra)
library(ggthemes)
library(RColorBrewer)
library(tmap)
library(tigris)
library(webshot2)
library(ggcorrplot)
library(ggspatial)
library(caret)
library(knitr)
library(pscl)
library(plotROC)
library(pROC)
library(lubridate)
library(car)
library(psych)
library(gridExtra)
library(ggpubr)
inflation_factor <- 1.11 # 2012 dollars to 2019 dollars
nash_county <- get_acs(geography = "county",
year=2012,
state=47,
county=037,
geometry=TRUE,
variables = "B25026_001E") %>%
st_transform('ESRI:102271')
chat_county <- get_acs(geography = "county",
year=2012,
state=47,
county=065,
geometry=TRUE,
variables = "B25026_001E") %>%
st_transform('ESRI:102271')
acs_vars_names <- c("TotalPop", "MedHHInc", "MedRent", "MedHomeVal",
"VacHU",
"Poverty",
"PubSNAP",
"OwnerOcc",
"RenterOcc",
"TotalHU",
"MedHUBuilt",
"BlackPop",
"HispLatPop",
"WhitePop",
"AsianPop",
"NatAmPop",
"Bach_deg",
"No_HS_Deg",
"HS_Grad",
"Grad_deg",
"Drive_commute",
"Carpool_commute",
"PubTrans_commute",
"Bike_commute",
"Walk_commute",
"WFA_commute",
"US_cit",
"NotUS_cit",
"EmplPop",
"UnemplPop",
"Occ_Agri",
"Occ_Const",
"Occ_Manuf",
"Occ_Wholesale",
"Occ_Retail",
"Occ_Transport",
"Occ_Info",
"Occ_FinIns",
"Occ_Prof_sci_mangr",
"Occ_Edu_health",
"Occ_Art_Hosp",
"Occ_PubAdmin",
"Occ_ArmedForce",
"Occ_other",
"PubSNAP_rate",
"RenterOcc_rate",
"WhitePop_rate",
"BlackPop_rate",
"HispLatPop_rate",
"Grad_deg_rate",
"PubTrans_commute_ratio",
"Citizen_Ratio")
Nash_census12 <-
get_acs(geography = "tract",
variables = c("B25026_001E", #total pop
"B19013_001E", #med hh inc
"B06012_002E", #poverty
"B19058_002E", #with public assistance income or SNAP
#housing
"B25058_001E", #med rent
"B25077_001E", #med home val
"B25003_002E", #owner occupied
"B25003_003E", #renter occupied
"B25004_001E", #vacant hu
"B25024_001E", #total hu
"B25035_001E", #med year structure built
#race
"B03002_004E", #black pop
"B03002_012E", #hisp / latino pop
"B03002_003E", #white pop
"B03002_006E", #asian pop
"B03002_005E", #native american
#education
"B06009_005E", #bach degree
"B06009_003E", #high school grad or equivilant
"B06009_002E", #less than hs deg
"B06009_006E", #grad or pro degree
#means of transport to work
"B08006_003E", #drove alone
"B08006_004E", #carpooled
"B08006_008E", #public transit
"B08006_014E", #bike
"B08006_015E", #walk
"B08006_017E", #work from home
#citizenship
"B05001_002E", #US citizen born in US
"B05001_006E", #not US citizen
#employment
"B23025_004E", #employed pop 16 & over
"B23025_005E", #unemployed pop 16 & over
"B08126_002E", #agriculture
"B08126_003E", #construction
"B08126_004E", #manufacturing
"B08126_005E", #wholesale trade
"B08126_006E", #retail
"B08126_007E", #transport, warehouse, utilities
"B08126_008E", #information
"B08126_009E", #finance, insurance
"B08126_010E", #pro, sci, management
"B08126_011E", #education & health care
"B08126_012E", #arts, entertainment, rec & hospitality
"B08126_014E", #public admin
"B08126_015E", #armed forces
"B08126_013E"), #other
year=2012, state=47, county=037,
geometry=TRUE, output="wide") %>%
st_transform('ESRI:102271') %>%
rename(TotalPop = B25026_001E,
MedHHInc = B19013_001E,
MedRent = B25058_001E,
MedHomeVal = B25077_001E,
VacHU = B25004_001E,
Poverty = B06012_002E,
PubSNAP = B19058_002E,
OwnerOcc = B25003_002E,
RenterOcc = B25003_003E,
TotalHU = B25024_001E,
MedHUBuilt = B25035_001E,
BlackPop = B03002_004E,
HispLatPop = B03002_012E,
WhitePop = B03002_003E,
AsianPop = B03002_006E,
NatAmPop = B03002_005E,
Bach_deg = B06009_005E,
No_HS_Deg = B06009_002E,
HS_Grad = B06009_003E,
Grad_deg = B06009_006E,
Drive_commute = B08006_003E,
Carpool_commute = B08006_004E,
PubTrans_commute = B08006_008E,
Bike_commute = B08006_014E,
Walk_commute = B08006_015E,
WFA_commute = B08006_017E,
US_cit = B05001_002E,
NotUS_cit = B05001_006E,
EmplPop = B23025_004E,
UnemplPop = B23025_005E,
Occ_Agri = B08126_002E,
Occ_Const = B08126_003E,
Occ_Manuf = B08126_004E,
Occ_Wholesale = B08126_005E,
Occ_Retail = B08126_006E,
Occ_Transport = B08126_007E,
Occ_Info = B08126_008E,
Occ_FinIns = B08126_009E,
Occ_Prof_sci_mangr = B08126_010E,
Occ_Edu_health = B08126_011E,
Occ_Art_Hosp = B08126_012E,
Occ_PubAdmin = B08126_014E,
Occ_ArmedForce = B08126_015E,
Occ_other = B08126_013E)%>%
dplyr::select(-NAME, -ends_with("M")) %>%
mutate(MedHHInc = MedHHInc * inflation_factor,
MedRent = MedRent * inflation_factor,
MedHomeVal = MedHomeVal * inflation_factor,
PubSNAP_rate = PubSNAP/TotalPop,
RenterOcc_rate = RenterOcc/TotalPop,
WhitePop_rate = WhitePop / TotalPop,
BlackPop_rate = BlackPop / TotalPop,
HispLatPop_rate = HispLatPop / TotalPop,
Grad_deg_rate = Grad_deg/TotalPop,
PubTrans_commute_ratio = PubTrans_commute/Drive_commute,
Citizen_Ratio = NotUS_cit/US_cit) %>%
rename_with(~ str_c(., "_12"),
.cols = all_of(acs_vars_names))
options(tigris_use_cache = TRUE)
Chatt_census12 <-
get_acs(geography = "tract",
variables = c("B25026_001E", #total pop
"B19013_001E", #med hh inc
"B06012_002E", #poverty
"B19058_002E", #with public assistance income or SNAP
#housing
"B25058_001E", #med rent
"B25077_001E", #med home val
"B25003_002E", #owner occupied
"B25003_003E", #renter occupied
"B25004_001E", #vacant hu
"B25024_001E", #total hu
"B25035_001E", #med year structure built
#race
"B03002_004E", #black pop
"B03002_012E", #hisp / latino pop
"B03002_003E", #white pop
"B03002_006E", #asian pop
"B03002_005E", #native american
#education
"B06009_005E", #bach degree
"B06009_003E", #high school grad or equivilant
"B06009_002E", #less than hs deg
"B06009_006E", #grad or pro degree
#means of transport to work
"B08006_003E", #drove alone
"B08006_004E", #carpooled
"B08006_008E", #public transit
"B08006_014E", #bike
"B08006_015E", #walk
"B08006_017E", #work from home
#citizenship
"B05001_002E", #US citizen born in US
"B05001_006E", #not US citizen
#employment
"B23025_004E", #employed pop 16 & over
"B23025_005E", #unemployed pop 16 & over
"B08126_002E", #agriculture
"B08126_003E", #construction
"B08126_004E", #manufacturing
"B08126_005E", #wholesale trade
"B08126_006E", #retail
"B08126_007E", #transport, warehouse, utilities
"B08126_008E", #information
"B08126_009E", #finance, insurance
"B08126_010E", #pro, sci, management
"B08126_011E", #education & health care
"B08126_012E", #arts, entertainment, rec & hospitality
"B08126_014E", #public admin
"B08126_015E", #armed forces
"B08126_013E"), #other
year=2012, state=47, county=065,
geometry=TRUE, output="wide") %>%
st_transform('ESRI:102271') %>%
rename(TotalPop = B25026_001E,
MedHHInc = B19013_001E,
MedRent = B25058_001E,
MedHomeVal = B25077_001E,
VacHU = B25004_001E,
Poverty = B06012_002E,
PubSNAP = B19058_002E,
OwnerOcc = B25003_002E,
RenterOcc = B25003_003E,
TotalHU = B25024_001E,
MedHUBuilt = B25035_001E,
BlackPop = B03002_004E,
HispLatPop = B03002_012E,
WhitePop = B03002_003E,
AsianPop = B03002_006E,
NatAmPop = B03002_005E,
Bach_deg = B06009_005E,
No_HS_Deg = B06009_002E,
HS_Grad = B06009_003E,
Grad_deg = B06009_006E,
Drive_commute = B08006_003E,
Carpool_commute = B08006_004E,
PubTrans_commute = B08006_008E,
Bike_commute = B08006_014E,
Walk_commute = B08006_015E,
WFA_commute = B08006_017E,
US_cit = B05001_002E,
NotUS_cit = B05001_006E,
EmplPop = B23025_004E,
UnemplPop = B23025_005E,
Occ_Agri = B08126_002E,
Occ_Const = B08126_003E,
Occ_Manuf = B08126_004E,
Occ_Wholesale = B08126_005E,
Occ_Retail = B08126_006E,
Occ_Transport = B08126_007E,
Occ_Info = B08126_008E,
Occ_FinIns = B08126_009E,
Occ_Prof_sci_mangr = B08126_010E,
Occ_Edu_health = B08126_011E,
Occ_Art_Hosp = B08126_012E,
Occ_PubAdmin = B08126_014E,
Occ_ArmedForce = B08126_015E,
Occ_other = B08126_013E)%>%
dplyr::select(-NAME, -ends_with("M")) %>%
mutate(MedHHInc = MedHHInc * inflation_factor,
MedRent = MedRent * inflation_factor,
MedHomeVal = MedHomeVal * inflation_factor,
PubSNAP_rate = PubSNAP/TotalPop,
RenterOcc_rate = RenterOcc/TotalPop,
WhitePop_rate = WhitePop / TotalPop,
BlackPop_rate = BlackPop / TotalPop,
HispLatPop_rate = HispLatPop / TotalPop,
Grad_deg_rate = Grad_deg/TotalPop,
PubTrans_commute_ratio = PubTrans_commute/Drive_commute,
Citizen_Ratio = NotUS_cit/US_cit) %>%
rename_with(~ str_c(., "_12"),
.cols = all_of(acs_vars_names))
options(tigris_use_cache = TRUE)
Nash_census19 <-
get_acs(geography = "tract",
variables = c("B25026_001E", #total pop
"B19013_001E", #med hh inc
"B06012_002E", #poverty
"B19058_002E", #with public assistance income or SNAP
#housing
"B25058_001E", #med rent
"B25077_001E", #med home val
"B25003_002E", #owner occupied
"B25003_003E", #renter occupied
"B25004_001E", #vacant hu
"B25024_001E", #total hu
"B25035_001E", #med year structure built
#race
"B03002_004E", #black pop
"B03002_012E", #hisp / latino pop
"B03002_003E", #white pop
"B03002_006E", #asian pop
"B03002_005E", #native american
#education
"B06009_005E", #bach degree
"B06009_003E", #high school grad or equivilant
"B06009_002E", #less than hs deg
"B06009_006E", #grad or pro degree
#means of transport to work
"B08006_003E", #drove alone
"B08006_004E", #carpooled
"B08006_008E", #public transit
"B08006_014E", #bike
"B08006_015E", #walk
"B08006_017E", #work from home
#citizenship
"B05001_002E", #US citizen born in US
"B05001_006E", #not US citizen
#employment
"B23025_004E", #employed pop 16 & over
"B23025_005E", #unemployed pop 16 & over
"B08126_002E", #agriculture
"B08126_003E", #construction
"B08126_004E", #manufacturing
"B08126_005E", #wholesale trade
"B08126_006E", #retail
"B08126_007E", #transport, warehouse, utilities
"B08126_008E", #information
"B08126_009E", #finance, insurance
"B08126_010E", #pro, sci, management
"B08126_011E", #education & health care
"B08126_012E", #arts, entertainment, rec & hospitality
"B08126_014E", #public admin
"B08126_015E", #armed forces
"B08126_013E"), #other
year=2019, state=47, county=037,
geometry=TRUE, output="wide") %>%
st_transform('ESRI:102271') %>%
rename(TotalPop = B25026_001E,
MedHHInc = B19013_001E,
MedRent = B25058_001E,
MedHomeVal = B25077_001E,
VacHU = B25004_001E,
Poverty = B06012_002E,
PubSNAP = B19058_002E,
OwnerOcc = B25003_002E,
RenterOcc = B25003_003E,
TotalHU = B25024_001E,
MedHUBuilt = B25035_001E,
BlackPop = B03002_004E,
HispLatPop = B03002_012E,
WhitePop = B03002_003E,
AsianPop = B03002_006E,
NatAmPop = B03002_005E,
Bach_deg = B06009_005E,
No_HS_Deg = B06009_002E,
HS_Grad = B06009_003E,
Grad_deg = B06009_006E,
Drive_commute = B08006_003E,
Carpool_commute = B08006_004E,
PubTrans_commute = B08006_008E,
Bike_commute = B08006_014E,
Walk_commute = B08006_015E,
WFA_commute = B08006_017E,
US_cit = B05001_002E,
NotUS_cit = B05001_006E,
EmplPop = B23025_004E,
UnemplPop = B23025_005E,
Occ_Agri = B08126_002E,
Occ_Const = B08126_003E,
Occ_Manuf = B08126_004E,
Occ_Wholesale = B08126_005E,
Occ_Retail = B08126_006E,
Occ_Transport = B08126_007E,
Occ_Info = B08126_008E,
Occ_FinIns = B08126_009E,
Occ_Prof_sci_mangr = B08126_010E,
Occ_Edu_health = B08126_011E,
Occ_Art_Hosp = B08126_012E,
Occ_PubAdmin = B08126_014E,
Occ_ArmedForce = B08126_015E,
Occ_other = B08126_013E) %>%
dplyr::select(-NAME, -ends_with("M")) %>%
mutate(PubSNAP_rate = PubSNAP/TotalPop,
RenterOcc_rate = RenterOcc/TotalPop,
WhitePop_rate = WhitePop / TotalPop,
BlackPop_rate = BlackPop / TotalPop,
HispLatPop_rate = HispLatPop / TotalPop,
Grad_deg_rate = Grad_deg/TotalPop,
PubTrans_commute_ratio = PubTrans_commute/Drive_commute,
Citizen_Ratio = NotUS_cit/US_cit) %>%
rename_with(~ str_c(., "_19"),
.cols = all_of(acs_vars_names))
options(tigris_use_cache = TRUE)
Chatt_census19 <-
get_acs(geography = "tract",
variables = c("B25026_001E", #total pop
"B19013_001E", #med hh inc
"B06012_002E", #poverty
"B19058_002E", #with public assistance income or SNAP
#housing
"B25058_001E", #med rent
"B25077_001E", #med home val
"B25003_002E", #owner occupied
"B25003_003E", #renter occupied
"B25004_001E", #vacant hu
"B25024_001E", #total hu
"B25035_001E", #med year structure built
#race
"B03002_004E", #black pop
"B03002_012E", #hisp / latino pop
"B03002_003E", #white pop
"B03002_006E", #asian pop
"B03002_005E", #native american
#education
"B06009_005E", #bach degree
"B06009_003E", #high school grad or equivilant
"B06009_002E", #less than hs deg
"B06009_006E", #grad or pro degree
#means of transport to work
"B08006_003E", #drove alone
"B08006_004E", #carpooled
"B08006_008E", #public transit
"B08006_014E", #bike
"B08006_015E", #walk
"B08006_017E", #work from home
#citizenship
"B05001_002E", #US citizen born in US
"B05001_006E", #not US citizen
#employment
"B23025_004E", #employed pop 16 & over
"B23025_005E", #unemployed pop 16 & over
"B08126_002E", #agriculture
"B08126_003E", #construction
"B08126_004E", #manufacturing
"B08126_005E", #wholesale trade
"B08126_006E", #retail
"B08126_007E", #transport, warehouse, utilities
"B08126_008E", #information
"B08126_009E", #finance, insurance
"B08126_010E", #pro, sci, management
"B08126_011E", #education & health care
"B08126_012E", #arts, entertainment, rec & hospitality
"B08126_014E", #public admin
"B08126_015E", #armed forces
"B08126_013E"), #other
year=2019, state=47, county=065,
geometry=TRUE, output="wide") %>%
st_transform('ESRI:102271') %>%
rename(TotalPop = B25026_001E,
MedHHInc = B19013_001E,
MedRent = B25058_001E,
MedHomeVal = B25077_001E,
VacHU = B25004_001E,
Poverty = B06012_002E,
PubSNAP = B19058_002E,
OwnerOcc = B25003_002E,
RenterOcc = B25003_003E,
TotalHU = B25024_001E,
MedHUBuilt = B25035_001E,
BlackPop = B03002_004E,
HispLatPop = B03002_012E,
WhitePop = B03002_003E,
AsianPop = B03002_006E,
NatAmPop = B03002_005E,
Bach_deg = B06009_005E,
No_HS_Deg = B06009_002E,
HS_Grad = B06009_003E,
Grad_deg = B06009_006E,
Drive_commute = B08006_003E,
Carpool_commute = B08006_004E,
PubTrans_commute = B08006_008E,
Bike_commute = B08006_014E,
Walk_commute = B08006_015E,
WFA_commute = B08006_017E,
US_cit = B05001_002E,
NotUS_cit = B05001_006E,
EmplPop = B23025_004E,
UnemplPop = B23025_005E,
Occ_Agri = B08126_002E,
Occ_Const = B08126_003E,
Occ_Manuf = B08126_004E,
Occ_Wholesale = B08126_005E,
Occ_Retail = B08126_006E,
Occ_Transport = B08126_007E,
Occ_Info = B08126_008E,
Occ_FinIns = B08126_009E,
Occ_Prof_sci_mangr = B08126_010E,
Occ_Edu_health = B08126_011E,
Occ_Art_Hosp = B08126_012E,
Occ_PubAdmin = B08126_014E,
Occ_ArmedForce = B08126_015E,
Occ_other = B08126_013E)%>%
dplyr::select(-NAME, -ends_with("M")) %>%
mutate(PubSNAP_rate = PubSNAP/TotalPop,
RenterOcc_rate = RenterOcc/TotalPop,
WhitePop_rate = WhitePop / TotalPop,
BlackPop_rate = BlackPop / TotalPop,
HispLatPop_rate = HispLatPop / TotalPop,
Grad_deg_rate = Grad_deg/TotalPop,
PubTrans_commute_ratio = PubTrans_commute/Drive_commute,
Citizen_Ratio = NotUS_cit/US_cit) %>%
rename_with(~ str_c(., "_19"),
.cols = all_of(acs_vars_names))
Nash_census <- Nash_census12 %>%
filter(TotalPop_12 != 0) %>%
st_drop_geometry() %>%
left_join(Nash_census19, by = c("GEOID")) %>%
st_sf() %>%
mutate(chng_MedHHInc = MedHHInc_19 - MedHHInc_12,
chng_MedRent = MedRent_19 - MedRent_12,
chng_MedHomeVal = MedHomeVal_19 - MedHomeVal_12,
chng_PubSNAP_rate = PubSNAP_rate_19 - PubSNAP_rate_12,
chng_RenterOcc_rate = RenterOcc_rate_19 - RenterOcc_rate_12,
chng_WhitePop_rate = WhitePop_rate_19 - WhitePop_rate_12,
chng_BlackPop_rate = BlackPop_rate_19 - BlackPop_rate_12,
chng_HispLatPop_rate = HispLatPop_rate_19 - HispLatPop_rate_12,
chng_Grad_deg_rate = Grad_deg_rate_19 - Grad_deg_rate_12,
chng_PubTrans_commute_ratio = PubTrans_commute_ratio_19 - PubTrans_commute_ratio_12,
chng_Citizen_Ratio = Citizen_Ratio_19 - Citizen_Ratio_12,
chng_MedHUBuilt = MedHUBuilt_19 - MedHUBuilt_12)
Chatt_census <- Chatt_census12 %>%
filter(TotalPop_12 != 0) %>%
st_drop_geometry() %>%
left_join(Chatt_census19, by = c("GEOID")) %>%
st_sf() %>%
mutate(chng_MedHHInc = MedHHInc_19 - MedHHInc_12,
chng_MedRent = MedRent_19 - MedRent_12,
chng_MedHomeVal = MedHomeVal_19 - MedHomeVal_12,
chng_PubSNAP_rate = PubSNAP_rate_19 - PubSNAP_rate_12,
chng_RenterOcc_rate = RenterOcc_rate_19 - RenterOcc_rate_12,
chng_WhitePop_rate = WhitePop_rate_19 - WhitePop_rate_12,
chng_BlackPop_rate = BlackPop_rate_19 - BlackPop_rate_12,
chng_HispLatPop_rate = HispLatPop_rate_19 - HispLatPop_rate_12,
chng_Grad_deg_rate = Grad_deg_rate_19 - Grad_deg_rate_12,
chng_PubTrans_commute_ratio = PubTrans_commute_ratio_19 - PubTrans_commute_ratio_12,
chng_Citizen_Ratio = Citizen_Ratio_19 - Citizen_Ratio_12,
chng_MedHUBuilt = MedHUBuilt_19 - MedHUBuilt_12)
options(tigris_use_cache = TRUE)
# within 0.25 miles of a bus stop
nash_bus_stps <- read.csv("Data/WeGoBus_Stops.csv") %>%
mutate(lat_long = str_extract(Mapped.Location, "\\(.*?\\)")) %>%
mutate(lat_long = str_replace_all(lat_long, "[\\(\\)]", ""))%>%
separate(lat_long, into = c("latitude", "longitude"), sep = ",\\s*") %>%
na.omit(Mapped.Location) %>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4269) %>%
st_transform('ESRI:102271')
chat_bus_stps <- st_read("Data/CARTA_Stops.geojson") %>%
st_transform(st_crs(nash_bus_stps))
nash_bus_buffer <- st_buffer(nash_bus_stps, 1320) %>%
st_union()
chat_bus_buffer <- st_buffer(chat_bus_stps, 1320) %>%
st_union()
Nash_census <-
rbind(
st_centroid(Nash_census)[nash_bus_buffer,] %>%
st_drop_geometry() %>%
left_join(Nash_census) %>%
st_sf() %>%
mutate(near_bus = 1),
st_centroid(Nash_census)[nash_bus_buffer, op = st_disjoint] %>%
st_drop_geometry() %>%
left_join(Nash_census) %>%
st_sf() %>%
mutate(near_bus = 0))
Chatt_census <-
rbind(
st_centroid(Chatt_census)[chat_bus_buffer,] %>%
st_drop_geometry() %>%
left_join(Chatt_census) %>%
st_sf() %>%
mutate(near_bus = 1),
st_centroid(Chatt_census)[chat_bus_buffer, op = st_disjoint] %>%
st_drop_geometry() %>%
left_join(Chatt_census) %>%
st_sf() %>%
mutate(near_bus = 0))
# number of public art pieces
nash_pub_art <- read.csv("Data/nash_art_public_space.csv") %>%
filter(!is.na(Latitude) | !is.na(Longitude)) %>%
st_as_sf(coords = c(x = "Longitude", y = "Latitude"), crs = 4269) %>%
st_transform('ESRI:102271')
chat_pub_art <- read.csv("Data/chat_art_public_space.csv") %>%
filter(!is.na(Latitude) | !is.na(Longitude)) %>%
st_as_sf(coords = c(x = "Longitude", y = "Latitude"), crs = 4269) %>%
st_transform('ESRI:102271')
Nash_census <- Nash_census %>%
mutate(n_pub_art = lengths(st_intersects(., nash_pub_art)))
Chatt_census <- Chatt_census %>%
mutate(n_pub_art = lengths(st_intersects(., chat_pub_art)))
# number of crimes
nash_crime <- read.csv("Data/nash_crime.csv") %>%
filter(!grepl("2024|2023|2022|2021|2020", Incident.Occurred))
chat_crime <- read.csv("Data/chat_crime.csv")
nash_violent_crime_19 <- nash_crime %>%
filter(grepl("2019", Incident.Occurred) &
Weapon.Description != "Unarmed" &
(!is.na(Latitude) | !is.na(Longitude))) %>%
st_as_sf(coords = c(x = "Longitude", y = "Latitude"), crs = 4269) %>%
st_transform('ESRI:102271')
nash_violent_crime_18 <- nash_crime %>%
filter(grepl("2018", Incident.Occurred) &
Weapon.Description != "Unarmed" &
(!is.na(Latitude) | !is.na(Longitude))) %>%
st_as_sf(coords = c(x = "Longitude", y = "Latitude"), crs = 4269) %>%
st_transform('ESRI:102271')
chat_violent_crime_19 <- chat_crime %>%
filter(grepl("2019", Date_Incident) &
Incident_Description != "Robbery" &
(!is.na(Latitude) | !is.na(Longitude))) %>%
st_as_sf(coords = c(x = "Longitude", y = "Latitude"), crs = 4269) %>%
st_transform('ESRI:102271')
chat_violent_crime_18 <- chat_crime %>%
filter(grepl("2018", Date_Incident) &
Incident_Description != "Robbery" &
(!is.na(Latitude) | !is.na(Longitude))) %>%
st_as_sf(coords = c(x = "Longitude", y = "Latitude"), crs = 4269) %>%
st_transform('ESRI:102271')
Nash_census <- Nash_census %>%
mutate(n_crime_19 = lengths(st_intersects(., nash_violent_crime_19)),
n_crime_18 = lengths(st_intersects(., nash_violent_crime_18)),
pct_chng_crime = (n_crime_19 - n_crime_18) / n_crime_18)
Chatt_census <- Chatt_census %>%
mutate(n_crime_19 = lengths(st_intersects(., chat_violent_crime_19)),
n_crime_18 = lengths(st_intersects(., chat_violent_crime_18)),
pct_chng_crime = (n_crime_19 - n_crime_18) / n_crime_18)
# 311 calls
nash_311 <- read.csv("Data/nash_311.csv")
chat_311 <- read.csv("Data/chat_311.csv",
na.strings = c(""," ","NA"))
nash_311_19 <- nash_311 %>%
filter(grepl("2019", Date...Time.Opened) &
(!is.na(Latitude) | !is.na(Longitude))) %>%
st_as_sf(coords = c(x = "Longitude", y = "Latitude"), crs = 4269) %>%
st_transform('ESRI:102271')
nash_311_18 <- nash_311 %>%
filter(grepl("2018", Date...Time.Opened) &
(!is.na(Latitude) | !is.na(Longitude))) %>%
st_as_sf(coords = c(x = "Longitude", y = "Latitude"), crs = 4269) %>%
st_transform('ESRI:102271')
chat_311_19 <- chat_311 %>%
filter(grepl("2019", Created.Date) & !is.na(PublicLocation)) %>%
st_as_sf(wkt = "PublicLocation", crs = 4269) %>%
st_transform('ESRI:102271')
chat_311_18 <- chat_311 %>%
filter(grepl("2018", Created.Date) & !is.na(PublicLocation)) %>%
st_as_sf(wkt = "PublicLocation", crs = 4269) %>%
st_transform('ESRI:102271')
Nash_census <- Nash_census %>%
mutate(n_311_19 = lengths(st_intersects(., nash_311_19)),
n_311_18 = lengths(st_intersects(., nash_311_18)),
pct_chng_311 = (n_311_19 - n_311_18) / n_311_18)
Chatt_census <- Chatt_census %>%
mutate(n_311_19 = lengths(st_intersects(., chat_311_19)),
n_311_18 = lengths(st_intersects(., chat_311_18)),
pct_chng_311 = (n_311_19 - n_311_18) / n_311_18)
Census Tract Cluster Analysis
We use a k-means clustering technique to select for four distinct
types of census tracts in our study cities. We use seven variables (see
table below) to measure both 2019 metrics in census tracts and the
change in those same tracts between 2012 and 2019.
variables <- c("Median Home Value", "Renter Occupied Rate", "White Population Rate", "Black Population Rate", "Hispanic / Latino Population Rate", "College Degree Rate", "SNAP Recipient Rate")
kmeans_vars <- as.data.frame(variables)
kmeans_vars %>%
gt() %>%
tab_header(title = "K-means clustering variables") %>%
gt_theme_nytimes()
| K-means clustering variables |
| variables |
| Median Home Value |
| Renter Occupied Rate |
| White Population Rate |
| Black Population Rate |
| Hispanic / Latino Population Rate |
| College Degree Rate |
| SNAP Recipient Rate |
# set seed and cluster number
set.seed(123)
k <- 4
#### Chattanooga
#Scaling the data (code from here: https://stackoverflow.com/questions/15215457/standardize-data-columns-in-r)
Chatt_scaled <- Chatt_census %>%
select(c(GEOID, MedHHInc_19, MedHomeVal_19, RenterOcc_rate_19,
chng_MedHHInc, chng_MedHomeVal, chng_RenterOcc_rate,
WhitePop_rate_19, BlackPop_rate_19, HispLatPop_rate_19,
chng_WhitePop_rate, chng_BlackPop_rate, chng_HispLatPop_rate,
Grad_deg_rate_19, chng_Grad_deg_rate,
PubSNAP_rate_19, chng_PubSNAP_rate)) %>%
st_drop_geometry() %>%
mutate_at(c(2:17), ~(scale(.) %>%
as.vector)) %>%
na.omit()
km.out_Chatt <- kmeans(Chatt_scaled, centers = k, nstart = 20)
##Assigning Cluster IDs
Chatt_scaled$cluster_id <- factor(km.out_Chatt$cluster)
Chatt_census <- Chatt_census %>%
left_join(Chatt_scaled %>%
select(GEOID, cluster_id),
by = c("GEOID")) %>%
mutate(chng_cluster = case_when(cluster_id == "1" ~ 1,
TRUE ~ 0))
#### Nashville
Nash_scaled <- Nash_census %>%
select(c(GEOID, MedHHInc_19, MedHomeVal_19, RenterOcc_rate_19,
chng_MedHHInc, chng_MedHomeVal, chng_RenterOcc_rate,
WhitePop_rate_19, BlackPop_rate_19, HispLatPop_rate_19,
chng_WhitePop_rate, chng_BlackPop_rate, chng_HispLatPop_rate,
Grad_deg_rate_19, chng_Grad_deg_rate,
PubSNAP_rate_19, chng_PubSNAP_rate)) %>%
st_drop_geometry() %>%
mutate_at(c(2:17), ~(scale(.) %>%
as.vector)) %>%
na.omit()
km.out_Nash <- kmeans(Nash_scaled, centers = k, nstart = 20)
##Assigning Cluster IDs
Nash_scaled$cluster_id <- factor(km.out_Nash$cluster)
Nash_census <- Nash_census %>%
left_join(Nash_scaled %>%
select(GEOID, cluster_id),
by = c("GEOID")) %>%
mutate(chng_cluster = case_when(cluster_id == "2" ~ 1,
TRUE ~ 0))
Nash_clusters <- Nash_census %>%
select(MedHHInc_19, MedHomeVal_19, RenterOcc_rate_19,
chng_MedHHInc, chng_MedHomeVal, chng_RenterOcc_rate,
WhitePop_rate_19, BlackPop_rate_19, HispLatPop_rate_19,
chng_WhitePop_rate, chng_BlackPop_rate, chng_HispLatPop_rate,
Grad_deg_rate_19, chng_Grad_deg_rate,
PubSNAP_rate_19, chng_PubSNAP_rate, cluster_id) %>%
st_drop_geometry() %>%
group_by(cluster_id) %>%
summarise_all("mean") %>%
na.omit()
## Economic profile table
nash_clusters_econ <- Nash_clusters %>%
select(c(1:7, 14:17))
nash_clusters_econ <- nash_clusters_econ[,c(1,2,5,3,6,4,7,8:11)]
nash_clusters_econ %>%
gt() %>%
fmt_percent(columns = c(6:11),
decimals = 1) %>%
fmt_currency(columns = c(2:5),
decimals = 0) %>%
cols_label(cluster_id = "Cluster",
MedHHInc_19 = "Median HH Income",
MedHomeVal_19 = "Median Home Value",
RenterOcc_rate_19 = "% Renters",
chng_MedHHInc = "Change Median Income",
chng_MedHomeVal = "Change Home Values",
chng_RenterOcc_rate = "Change % Renters",
Grad_deg_rate_19 = "% Grad Degree",
chng_Grad_deg_rate = "Change % Grad Degree",
PubSNAP_rate_19 = "% Recieiving SNAP",
chng_PubSNAP_rate = "Change % Recieiving SNAP") %>%
tab_header(title = "Economic Profile of Clusters for Nashville",
subtitle = "2019 and change sicne 2012 (Source: 2012 & 2019 ACS 5-year Estimate) ") %>%
cols_width(2:11 ~ px(200)) %>%
gt_theme_nytimes()
| Economic Profile of Clusters for Nashville |
| 2019 and change sicne 2012 (Source: 2012 & 2019 ACS 5-year Estimate) |
| Cluster |
Median HH Income |
Change Median Income |
Median Home Value |
Change Home Values |
% Renters |
Change % Renters |
% Grad Degree |
Change % Grad Degree |
% Recieiving SNAP |
Change % Recieiving SNAP |
| 1 |
$57,474 |
$7,700 |
$247,313 |
$59,117 |
23.1% |
0.6% |
9.7% |
1.7% |
5.2% |
−1.4% |
| 2 |
$53,616 |
$12,704 |
$249,253 |
$97,796 |
21.2% |
−0.1% |
10.3% |
3.8% |
8.2% |
−2.7% |
| 3 |
$54,516 |
$8,650 |
$205,085 |
$50,549 |
17.5% |
−0.2% |
6.8% |
2.1% |
6.1% |
−2.8% |
| 4 |
$82,182 |
$11,653 |
$346,307 |
$62,704 |
17.8% |
−0.1% |
15.0% |
2.3% |
3.3% |
−1.5% |
##Demographic profile table
nash_clusters_demo <- Nash_clusters %>%
select(c(1,8:13))
nash_clusters_demo <- nash_clusters_demo[,c(1,2,5,3,6,4,7)]
nash_clusters_demo %>%
gt() %>%
fmt_percent(columns = c(2:7),
decimals =1) %>%
cols_label(cluster_id = "Cluster",
WhitePop_rate_19 = "% White",
BlackPop_rate_19 = "% Black",
HispLatPop_rate_19 = "% Hispanic or Latino",
chng_WhitePop_rate = "Change % White",
chng_BlackPop_rate = "Change % Black",
chng_HispLatPop_rate = "Change % Hispanic or Latino") %>%
tab_header(title = "Demographic Profile of Clusters for Nashville",
subtitle = "2019 and change since 2012 (Source: 2012 & 2019 ACS 5-year Estimate) ") %>%
cols_width(2:7 ~ px(200)) %>%
gt_theme_nytimes()
| Demographic Profile of Clusters for Nashville |
| 2019 and change since 2012 (Source: 2012 & 2019 ACS 5-year Estimate) |
| Cluster |
% White |
Change % White |
% Black |
Change % Black |
% Hispanic or Latino |
Change % Hispanic or Latino |
| 1 |
59.0% |
−0.6% |
29.1% |
−1.7% |
12.2% |
2.3% |
| 2 |
43.4% |
7.4% |
53.0% |
−10.0% |
3.8% |
−0.2% |
| 3 |
52.0% |
−1.2% |
36.0% |
−0.7% |
8.9% |
0.9% |
| 4 |
67.4% |
−1.4% |
13.9% |
−1.0% |
12.2% |
−0.0% |
Chatt_clusters <- Chatt_census %>%
select(MedHHInc_19, MedHomeVal_19, RenterOcc_rate_19,
chng_MedHHInc, chng_MedHomeVal, chng_RenterOcc_rate,
WhitePop_rate_19, BlackPop_rate_19, HispLatPop_rate_19,
chng_WhitePop_rate, chng_BlackPop_rate, chng_HispLatPop_rate,
Grad_deg_rate_19, chng_Grad_deg_rate,
PubSNAP_rate_19, chng_PubSNAP_rate, cluster_id) %>%
st_drop_geometry() %>%
group_by(cluster_id) %>%
summarise_all("mean") %>%
na.omit()
## Economic profile table
chatt_clusters_econ <- Chatt_clusters %>%
select(c(1:7, 14:17))
chatt_clusters_econ <- chatt_clusters_econ[,c(1,2,5,3,6,4,7,8:11)]
chatt_clusters_econ %>%
gt() %>%
fmt_percent(columns = c(6:11),
decimals = 1) %>%
fmt_currency(columns = c(2:5),
decimals = 0) %>%
cols_label(cluster_id = "Cluster",
MedHHInc_19 = "Median HH Income",
MedHomeVal_19 = "Median Home Value",
RenterOcc_rate_19 = "% Renters",
chng_MedHHInc = "Change Median Income",
chng_MedHomeVal = "Change Home Values",
chng_RenterOcc_rate = "Change % Renters",
Grad_deg_rate_19 = "% Grad Degree",
chng_Grad_deg_rate = "Change % Grad Degree",
PubSNAP_rate_19 = "% Recieiving SNAP",
chng_PubSNAP_rate = "Change % Recieiving SNAP") %>%
tab_header(title = "Economic Profile of Clusters for Chattanooga",
subtitle = "2019 and change since 2012 (Source: 2012 & 2019 ACS 5-year Estimate) ") %>%
cols_width(2:11 ~ px(200)) %>%
gt_theme_nytimes()
| Economic Profile of Clusters for Chattanooga |
| 2019 and change since 2012 (Source: 2012 & 2019 ACS 5-year Estimate) |
| Cluster |
Median HH Income |
Change Median Income |
Median Home Value |
Change Home Values |
% Renters |
Change % Renters |
% Grad Degree |
Change % Grad Degree |
% Recieiving SNAP |
Change % Recieiving SNAP |
| 1 |
$44,023 |
$7,132 |
$179,040 |
$38,614 |
23.8% |
1.6% |
9.8% |
4.2% |
10.0% |
−2.1% |
| 2 |
$60,548 |
$2,384 |
$182,169 |
$17,227 |
12.7% |
−0.3% |
7.7% |
1.7% |
3.7% |
−1.0% |
| 3 |
$64,725 |
$1,929 |
$207,823 |
$7,203 |
14.6% |
1.5% |
9.0% |
1.0% |
4.5% |
−0.1% |
| 4 |
$40,714 |
$8,005 |
$160,517 |
$362 |
25.3% |
0.9% |
6.6% |
1.7% |
10.0% |
−1.9% |
##Demographic profile table
chatt_clusters_demo <- Chatt_clusters %>%
select(c(1,8:13))
chatt_clusters_demo <- chatt_clusters_demo[,c(1,2,5,3,6,4,7)]
chatt_clusters_demo %>%
gt() %>%
fmt_percent(columns = c(2:7),
decimals =1) %>%
cols_label(cluster_id = "Cluster",
WhitePop_rate_19 = "% White",
BlackPop_rate_19 = "% Black",
HispLatPop_rate_19 = "% Hispanic or Latino",
chng_WhitePop_rate = "Change % White",
chng_BlackPop_rate = "Change % Black",
chng_HispLatPop_rate = "Change % Hispanic or Latino") %>%
tab_header(title = "Demographic Profile of Clusters for Chattanooga",
subtitle = "2019 and change since 2012 (Source: 2012 & 2019 ACS 5-year Estimate) ") %>%
cols_width(2:7 ~ px(200)) %>%
gt_theme_nytimes()
| Demographic Profile of Clusters for Chattanooga |
| 2019 and change since 2012 (Source: 2012 & 2019 ACS 5-year Estimate) |
| Cluster |
% White |
Change % White |
% Black |
Change % Black |
% Hispanic or Latino |
Change % Hispanic or Latino |
| 1 |
50.7% |
4.7% |
45.5% |
−5.2% |
4.1% |
−1.4% |
| 2 |
90.1% |
−2.3% |
3.9% |
0.3% |
3.1% |
0.4% |
| 3 |
74.9% |
−0.3% |
23.0% |
1.3% |
5.8% |
1.3% |
| 4 |
45.9% |
1.7% |
40.0% |
−5.4% |
15.0% |
1.8% |
Exploring collinearity
The plot below shows how all of our selected census and spatial
variables interact with one another. While some have high positive
correlation, shown in red (median household income and rate of graduate
degrees, for example), we pay particular attention to those with
negative correlation, shown in blue, as they may be important indicators
of neighborhood change or gentrification. Variables with the strongest
negative correlation include percent black with percent white,
indicating the way neighborhoods are racially segregated, and change in
percent black with percent white, indicating the way neighborhood change
often occurs along a Black/White racial binary. Negative correlations
for percent Hispanic / Latino are similar, although less intense than
for Black demographics.
vars_corr <- Nash_census %>%
st_drop_geometry() %>%
select(MedHHInc_19, MedHomeVal_19, RenterOcc_rate_19,
chng_MedHHInc, chng_MedHomeVal, chng_RenterOcc_rate,
WhitePop_rate_19, BlackPop_rate_19, HispLatPop_rate_19,
chng_WhitePop_rate, chng_BlackPop_rate, chng_HispLatPop_rate,
Grad_deg_rate_19, chng_Grad_deg_rate,
PubSNAP_rate_19, chng_PubSNAP_rate,
near_bus, n_pub_art, pct_chng_crime, pct_chng_311) %>%
na.omit()
vars_corr <- round(cor(vars_corr), 2)
p.mat <- corr.test(vars_corr)$p
corr_axis_labs <- c(MedHHInc_19 = "Median HH Income",
MedHomeVal_19 = "Median Home Value",
RenterOcc_rate_19 = "% Renters",
chng_MedHHInc = "Change Median Income",
chng_MedHomeVal = "Change Home Values",
chng_RenterOcc_rate = "Change % Renters",
WhitePop_rate_19 = "% White",
BlackPop_rate_19 = "% Black",
HispLatPop_rate_19 = "% Hispanic or Latino",
chng_WhitePop_rate = "Change % White",
chng_BlackPop_rate = "Change % Black",
chng_HispLatPop_rate = "Change % Hispanic or Latino",
Grad_deg_rate_19 = "% Grad Degree",
chng_Grad_deg_rate = "Change % Grad Degree",
PubSNAP_rate_19 = "% Recieiving SNAP",
chng_PubSNAP_rate = "Change % Recieiving SNAP",
near_bus = "Within .25 mi of bus",
n_pub_art = "Public art pieces",
pct_chng_crime = "Annual % Change Crime",
pct_chng_311 = "Annual % Change 311")
ggcorrplot(vars_corr, type = "lower", hc.order = TRUE, p.mat = p.mat, insig = "blank") +
scale_x_discrete(labels = corr_axis_labs) +
scale_y_discrete(labels = corr_axis_labs) +
labs(title = "Correlation Matrix for Nashville")+
theme(axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
panel.background = element_rect(fill = "#f0f0f0", color = "#ffffff", linewidth = 2))

Mapping variables of interest
Cluster IDs
Nash_census <- Nash_census %>%
mutate(cluster_name = case_when(
cluster_id == "1" ~ "Potentially Gentrifying",
cluster_id == "2" ~ "Rapidly Gentrifying",
cluster_id == "3" ~ "Resisting Gentrification",
cluster_id == "4" ~ "Increasingly Affluent",
TRUE ~ "Other"
))
Chatt_census <- Chatt_census %>%
mutate(cluster_name = case_when(
cluster_id == "1" ~ "Rapidly Gentrifying",
cluster_id == "2" ~ "Potentially Gentrifying",
cluster_id == "3" ~ "Increasingly Affluent",
cluster_id == "4" ~ "Resisting Gentrification",
TRUE ~ "Other"
))
Nash_cluster_map <- ggplot() +
geom_sf(data = nash_county,
fill = "#f7f7f7",
color = NA) +
geom_sf(data = subset(Nash_census, cluster_name != "Other"),
aes(fill = cluster_name),
color = "#d9d9d9") +
scale_fill_brewer(name = "Cluster",
type = "qual",
palette = "Set3",
na.value = "#f7f7f7") +
geom_sf(data = nash_county,
fill = "transparent",
color = "#525252",
lwd = 0.3,
lty = "longdash") +
theme_void()
Chatt_cluster_map <- ggplot() +
geom_sf(data = chat_county,
fill = "#f7f7f7",
color = NA) +
geom_sf(data = subset(Chatt_census, cluster_name != "Other"),
aes(fill = cluster_name),
color = "#d9d9d9") +
scale_fill_brewer(name = "Cluster",
type = "qual",
palette = "Set3",
na.value = "#f7f7f7") +
geom_sf(data = chat_county,
fill = "transparent",
color = "#525252",
lwd = 0.3,
lty = "longdash") +
theme_void()
grid.arrange(Nash_cluster_map, Chatt_cluster_map, ncol = 2)

Crime
Nash_crime_map <- ggplot() +
geom_sf(data = nash_county,
fill = "#f7f7f7",
color = NA) +
geom_sf(data = Nash_census,
aes(fill = pct_chng_crime),
color = "#d9d9d9") +
scale_fill_distiller(name = "Annual % Change in Crime",
type = "div",
labels = scales::label_percent(),
palette = "RdBu",
direction = -1,
na.value = "#f7f7f7") +
geom_sf(data = nash_county,
fill = "transparent",
color = "#525252",
lwd = 0.3,
lty = "longdash") +
labs(title = "Nashville (Davidson County)") +
annotation_scale(location = "bl",
unit_category = "imperial") +
theme_void() +
theme(legend.position = "bottom")
Chatt_crime_map <- ggplot() +
geom_sf(data = chat_county,
fill = "#f7f7f7",
color = NA) +
geom_sf(data = Chatt_census,
aes(fill = pct_chng_crime),
color = "#d9d9d9") +
scale_fill_distiller(name = "Annual % Change in Crime",
type = "div",
labels = scales::label_percent(),
palette = "RdBu",
direction = -1,
na.value = "#f7f7f7") +
geom_sf(data = chat_county,
fill = "transparent",
color = "#525252",
lwd = 0.3,
lty = "longdash") +
labs(title = "Chattanooga (Hamilton County)") +
theme_void() +
theme(legend.position = "bottom",
legend.text = element_text(angle = 45, hjust = 1))
crime_map <- grid.arrange(Nash_crime_map, Chatt_crime_map, ncol = 2)

ggsave(filename = "Exports/crime_map.png",
plot = crime_map,
width = 8,
height = 6,
units = "in")
Access to transit
Nash_census <- Nash_census %>%
mutate(near_bus_yn = case_when(near_bus == 1 ~ "Yes",
near_bus == 0 ~ "No"))
Nash_bus_map <- ggplot() +
geom_sf(data = nash_county,
fill = "#f7f7f7",
color = NA) +
geom_sf(data = Nash_census,
aes(fill = near_bus_yn),
color = "#d9d9d9") +
scale_fill_manual(name = "Within 0.25 miles of bus stop",
values = c("#fed976", "#4292c6"),
labels = c("No", "Yes"),
na.value = "#f7f7f7") +
geom_sf(data = nash_county,
fill = "transparent",
color = "#525252",
lwd = 0.3,
lty = "longdash") +
labs(title = "Nashville (Davidson County)") +
annotation_scale(location = "bl",
unit_category = "imperial") +
theme_void() +
theme(legend.position = "bottom")
Chatt_census <- Chatt_census %>%
mutate(near_bus_yn = case_when(near_bus == 1 ~ "Yes",
near_bus == 0 ~ "No"))
Chat_bus_map <- ggplot() +
geom_sf(data = chat_county,
fill = "#f7f7f7",
color = NA) +
geom_sf(data = Chatt_census,
aes(fill = near_bus_yn),
color = "#d9d9d9") +
scale_fill_manual(name = "Within 0.25 miles of bus stop",
values = c("#fed976", "#4292c6"),
labels = c("No", "Yes"),
na.value = "#f7f7f7") +
geom_sf(data = chat_county,
fill = "transparent",
color = "#525252",
lwd = 0.3,
lty = "longdash") +
labs(title = "Chattanooga (Hamilton County)") +
theme_void() +
theme(legend.position = "bottom")
bus_map <- grid.arrange(Nash_bus_map, Chat_bus_map, ncol = 2)

ggsave(filename = "Exports/bus_map.png",
plot = bus_map,
width = 8,
height = 6.5,
units = "in")
Logit models to predict change cluster in Nashville
nash_change_logit <- glm(chng_cluster ~ MedHHInc_12+MedRent_12+ MedHUBuilt_12+ PubSNAP_12+ RenterOcc_12 + WhitePop_12 + BlackPop_rate_12 + HispLatPop_rate_12+ Grad_deg_rate_12 + PubTrans_commute_12 + Citizen_Ratio_12,
family="binomial" (link="logit"), Nash_census)
summary(nash_change_logit)
nash_change_logit2 <- glm(chng_cluster ~ near_bus + n_crime_18 + n_pub_art,
family="binomial" (link="logit"), Nash_census)
summary(nash_change_logit2)
nash_change_logit3 <- glm(chng_cluster ~ MedHHInc_12+ MedHUBuilt_12+ RenterOcc_12 + BlackPop_rate_12 + PubTrans_commute_12 + Citizen_Ratio_12 + near_bus + n_crime_18,
family="binomial" (link="logit"), Nash_census)
summary(nash_change_logit3)
nash_change_logit4 <- glm(chng_cluster ~ MedHHInc_12 + BlackPop_rate_12 + HispLatPop_12 + Grad_deg_rate_12 ,
family="binomial" (link="logit"), Nash_census)
summary(nash_change_logit4)
Confusion matrices
Nash_census <- Nash_census %>%
mutate(outcome = as.factor(chng_cluster),
probs = predict(nash_change_logit, Nash_census, type= "response"),
pred_outcome = as.factor(case_when(probs >= 0.5 ~ 1,
probs < 0.5 ~ 0)))
cm_nash <- confusionMatrix(Nash_census$pred_outcome, Nash_census$outcome,
positive = "1")
cm_table_nash <- cm_nash$table
cm_df_nash <- as.data.frame(cm_table_nash)
rownames(cm_df_nash) <- c("True Negative", "False Positive",
"False Negative", "True Positive")
gt(cm_df_nash %>%
rownames_to_column()) %>%
fmt_number(columns = Freq, decimals = 0) %>%
tab_header(title = md("**Confusion Matrix** *for Nashville*")) %>%
opt_align_table_header(align = "left") %>%
cols_label(Freq = "Frequency")
| Confusion Matrix for Nashville |
|
Prediction |
Reference |
Frequency |
| True Negative |
0 |
0 |
90 |
| False Positive |
1 |
0 |
47 |
| False Negative |
0 |
1 |
18 |
| True Positive |
1 |
1 |
1 |
Chatt_census <- Chatt_census %>%
mutate(outcome = as.factor(chng_cluster),
probs = predict(nash_change_logit4, Chatt_census, type= "response"),
pred_outcome = as.factor(case_when(probs >= 0.5 ~ 1,
probs < 0.5 ~ 0)))
cm_chatt <- confusionMatrix(Chatt_census$pred_outcome, Chatt_census$outcome,
positive = "1")
cm_table_chatt <- cm_chatt$table
cm_df_chatt <- as.data.frame(cm_table_chatt)
rownames(cm_df_chatt) <- c("True Negative", "False Positive",
"False Negative", "True Positive")
gt(cm_df_chatt %>%
rownames_to_column()) %>%
fmt_number(columns = Freq, decimals = 0) %>%
tab_header(title = md("**Confusion Matrix** *for Chattanooga*")) %>%
opt_align_table_header(align = "left") %>%
cols_label(Freq = "Frequency")
| Confusion Matrix for Chattanooga |
|
Prediction |
Reference |
Frequency |
| True Negative |
0 |
0 |
58 |
| False Positive |
1 |
0 |
12 |
| False Negative |
0 |
1 |
8 |
| True Positive |
1 |
1 |
2 |
testProbs_chatt <- data.frame(Outcome = as.factor(Chatt_census$chng_cluster),
Probs = predict(nash_change_logit4, Chatt_census, type= "response"))
testProbs_nash <- data.frame(Outcome = as.factor(Nash_census$chng_cluster),
Probs = predict(nash_change_logit4, Nash_census, type= "response"))
roc_chatt <- ggplot(testProbs_chatt, aes(d = as.numeric(Outcome), m = Probs)) +
geom_roc(n.cuts = 50, labels = FALSE, colour = "#1f78b4") +
style_roc(theme = theme_grey) +
geom_abline(slope = 1, intercept = 0, size = 1.5, color = 'grey') +
labs(title = "ROC Curve: Chattanooga Model") +
theme_minimal() +
theme(plot.title = element_text(face = "bold", margin = margin(b = 6)))
roc_nash <- ggplot(testProbs_nash, aes(d = as.numeric(Outcome), m = Probs)) +
geom_roc(n.cuts = 50, labels = FALSE, colour = "#1f78b4") +
style_roc(theme = theme_grey) +
geom_abline(slope = 1, intercept = 0, size = 1.5, color = 'grey') +
labs(title = "ROC Curve: Nashville Model") +
theme_minimal() +
theme(plot.title = element_text(face = "bold", margin = margin(b = 6)))
ggarrange(roc_nash, roc_chatt)
